1 Load Data

library(psych) # for SPSS-style PCA
library(paran) # for parallel analyses
## Loading required package: MASS
library(GPArotation) # for robustness checks
library(kableExtra) # for nice tables
library(tidyverse) # for data cleaning
## ── Attaching packages ──────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.3
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()      masks psych::%+%()
## ✖ ggplot2::alpha()    masks psych::alpha()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::select()     masks MASS::select()
# create directory for saving figures
if (!dir.exists("figures")) { dir.create("figures") }

options(knitr.kable.NA = '')
knitr::opts_chunk$set(echo = TRUE, 
                      warning = FALSE, 
                      message = FALSE)

R.version.string
## [1] "R version 3.6.0 (2019-04-26)"
set.seed(8675309)

1.1 Simulate Study Data (for Stage 1 RR)

See https://osf.io/87rbg/ for Stage 1 RR code. The code below is modified from the original to account for a different raw data structure and to add additional tables and graphs. All analysis code is identical.

1.2 Load Study Data (for Stage 2 RR)

Load study data and demographic questionnaires from the data folder.

session <- read_csv("data/psa001_session.csv")
dat_quest <- read_csv("data/psa001_quest_data.csv")
dat_exp <- read_csv("data/psa001_exp_data.csv")

# reshape questionnaire data to make wide
quest <- dat_quest %>%
  select(session_id, endtime, user_id, q_name, dv) %>%
  group_by(session_id, user_id, q_name) %>%
  arrange(endtime) %>%
  filter(row_number() == 1) %>%
  ungroup() %>%
  spread(q_name, dv, convert = TRUE)

Join experiment and questionnaire data

ratings_raw <- dat_exp %>%
  left_join(session, by = c("user_id", "session_id")) %>%
  filter(user_status %in% c("guest", "registered")) %>%
  separate(exp_name, c("psa", "language", "trait", "block"), 
           sep = "_") %>%
  select(-psa) %>%
  separate(proj_name, c("psa", "lang", "lab1", "lab2"),
           sep = "_", fill = "right") %>%
  filter(lab1 != "test") %>%
  unite(lab_id, c("lab1", "lab2")) %>%
  select(-psa, lang) %>%
  left_join(quest, by = c("session_id", "user_id")) %>%
  select(language, user_id = session_id, trait, 
         stim_id = trial_name, 
         order, rt, rating = dv,
         country, sex, age, ethnicity, lab = lab_id, block) %>%
  mutate(trait = recode(trait,
                        "Res" = "responsible",
                        "Wei" = "weird",
                        "Old" = "old",
                        "Tru" = "trustworthy",
                        "Dom" = "dominant",
                        "Emo" = "emostable",
                        "Agg" = "aggressive",
                        "Car" = "caring",
                        "Int" = "intelligent",
                        "Unh" = "unhappy",
                        "Soc" = "sociable",
                        "Mea" = "mean",
                        "Con" = "confident",
                        "Att" = "attractive"
  ))

write_csv(ratings_raw, "data/psa001_ratings_raw.csv")

1.3 Load Auxillary Data

Data on regions and stimuli.

1.3.1 Load Region Data

regions <- read_csv("data/regions.csv")

1.3.2 Load Stimulus Info

stim_info <- read_csv("data/psa001_cfd_faces.csv") %>%
  mutate(ethnicity = recode(Race, "A" = "asian", "B" = "black", "L" = "latinx", "W" = "white"),
         gender = recode(Gender, "M" = "male", "F" = "female")
  )

stim_info %>%
  group_by(ethnicity, gender) %>%
  summarise(
    n = n(),
    mean_age = round(mean(Age), 2),
    sd_age = round(sd(Age), 2)
  ) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
ethnicity gender n mean_age sd_age
asian female 15 26.15 3.33
asian male 15 26.40 3.21
black female 15 27.00 3.51
black male 15 28.07 4.27
latinx female 15 25.27 2.42
latinx male 15 26.31 4.00
white female 15 25.77 3.03
white male 15 26.06 4.46
stim_n_male <- sum(stim_info$gender == "male")
stim_n_female <- sum(stim_info$gender == "female")
mean_age <- mean(stim_info$Age) %>% round(2)
sd_age <- sd(stim_info$Age) %>% round(2)
min_age <- min(stim_info$Age)
max_age <- max(stim_info$Age)

Stimuli in our study will be an open-access, full-color, face image set consisting of 60 men and 60 women (mean age=26.38 years, SD=3.57 years, range=18.7307692 to 34.9310345 years), taken under standardized photographic conditions (Ma et al., 2015).

1.3.3 Load O&T 2008 Data

# read original OT data and get into same format as data_agg will be

traits <- ratings_raw %>%
  filter(trait != "old", !is.na(trait)) %>%
  arrange(trait) %>%
  pull(trait) %>%
  unique()
  
ot_data <- readxl::read_excel("data/Karolinska_14trait_judgmentswithlabels.xls") %>%
  mutate(region = "(Oosterhof & Todorov, 2008)") %>%
  rename(stim_id = `Todorov Label`,
         emostable = `emotionally stable`) %>%
  select(region, stim_id, traits)

1.4 Data Processing

1.4.1 Join Data

ratings <- ratings_raw %>%
  rename(qcountry = country) %>%
  separate(lab, c("country", "lab")) %>%
  left_join(regions, by = "country") %>%
  filter(trait != "old")

1.4.2 Graph distributions for trait by region

# plot styles
bgcolor <- "white"
textcolor <- "black"
PSA_theme <- theme(
    plot.background = element_rect(fill = bgcolor, color = NA),
    panel.background = element_rect(fill = NA, color = "grey"),
    legend.background = element_rect(fill = NA),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    text = element_text(color = textcolor, size=15),
    axis.text = element_text(color = textcolor, size=10),
    strip.text.y = element_text(angle = 0, hjust = 0)
  )
ggplot(ratings, aes(rating, fill = trait)) +
  geom_histogram(binwidth = 1, color = "grey", show.legend = F) +
  facet_grid(region~trait, space = "free") +
  scale_x_continuous(breaks = 1:9) +
  PSA_theme

1.5 Data checks

part <- ratings %>%
  group_by(user_id, sex, age, country, language, trait, region, lab) %>%
  summarise(trials = n(),
            stim_n = n_distinct(stim_id)) %>%
  ungroup()

1.5.1 How many participants completed at least one rating for each of 120 stimuli

part %>% 
  mutate(n120 = ifelse(stim_n == 120, "rated all 120", "rated < 120")) %>%
  count(region, n120) %>%
  spread(n120, n) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region rated < 120 rated all 120
Africa 12 553
Asia 49 822
Australia & New Zealand 21 1105
Central America & Mexico 32 465
Eastern Europe 106 847
Middle East 14 433
Scandinavia 48 689
South America 31 1349
UK 4 382
USA & Canada 66 3528
Western Europe 39 1973

1.5.2 Participants who did not complete exactly 240 trials

part %>% 
  mutate(n240 = case_when(
    trials == 240 ~ "rated 240", 
    trials > 240 ~ "rated > 240",
    trials < 120 ~ "rated < 120",
    trials < 240 ~ "rated 120-239"
  )) %>%
  count(region, n240) %>%
  spread(n240, n, fill = 0) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region rated < 120 rated > 240 rated 120-239 rated 240
Africa 10 4 103 448
Asia 47 9 209 606
Australia & New Zealand 18 5 222 881
Central America & Mexico 28 0 160 309
Eastern Europe 104 0 224 625
Middle East 13 3 70 361
Scandinavia 47 2 150 538
South America 31 7 242 1100
UK 4 2 47 333
USA & Canada 65 2 382 3145
Western Europe 37 4 170 1801

1.5.3 Participants with low-variance responses in block 1

identical_rating_threshold <- 0.75 * 120 # use this for registered analyses

inv_participants <- ratings %>%
  filter(block == 1) %>%
  count(user_id, region, trait, rating) %>%
  group_by(user_id, region, trait) %>%
  filter(n == max(n)) %>% # find most common rating for each P
  ungroup() %>%
  filter(n >= identical_rating_threshold) # select Ps who gave the same rating to >= 75% of stimuli

inv <- inv_participants %>%
  count(region, trait) %>%
  spread(region, n, fill = 0) %>%
  mutate(TOTAL = rowSums(select_if(., is.numeric), na.rm = T))

inv_total <-  group_by(inv) %>% 
  summarise_if(is.numeric, sum, na.rm = T) %>%
  mutate(trait = "TOTAL")
 
bind_rows(inv,inv_total) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
trait Africa Asia Australia & New Zealand Central America & Mexico Eastern Europe Middle East Scandinavia South America UK USA & Canada Western Europe TOTAL
aggressive 4 5 16 0 2 3 2 4 4 21 14 75
attractive 2 4 3 4 1 3 3 12 0 16 9 57
caring 1 0 2 1 1 1 0 5 0 16 3 30
confident 4 1 0 2 1 1 1 3 0 4 2 19
dominant 4 4 0 2 2 0 1 1 0 10 4 28
emostable 3 3 5 1 1 2 4 3 4 14 10 50
intelligent 2 5 4 1 5 1 4 3 2 15 7 49
mean 2 4 6 3 5 2 6 5 3 27 17 80
responsible 0 3 5 0 3 0 2 5 3 19 8 48
sociable 1 1 0 1 0 0 1 4 0 12 3 23
trustworthy 3 6 2 2 5 2 6 5 3 15 15 64
unhappy 1 3 6 0 2 1 3 4 0 12 5 37
weird 6 3 12 6 10 3 5 12 1 36 13 107
TOTAL 33 42 61 23 38 19 38 66 20 217 110 667

1.5.4 Participants with no region

part %>% 
  filter(is.na(region)) %>%
  select(user_id, country, lab)
## # A tibble: 0 x 3
## # … with 3 variables: user_id <dbl>, country <chr>, lab <chr>

1.5.5 Remove excluded data and average ratings

data <- ratings %>%
  group_by(user_id, trait) %>%
  filter(
    # did not complete 1+ ratings for each of 120 stimuli
    dplyr::n_distinct(stim_id) == 120,      
    !is.na(region)   # did not specify region (none expected)
  ) %>%
  anti_join(inv_participants, by = "user_id") %>% # exclude Ps with low variance
  ungroup() %>%
  group_by(user_id, age, sex, ethnicity, language, lab, country, region, trait, stim_id) %>%
  summarise(rating = mean(rating)) %>% # average ratings across 2 
  ungroup()

write_csv(data, "data/psa001_ind.csv")

1.6 Participant Demographics

data %>%
  group_by(user_id, language) %>%
  summarise() %>%
  ungroup() %>%
  group_by(language) %>%
  summarise(n = n()) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
language n
EL 99
ENG 5860
ES-PE 115
FAS 48
FR-BE 86
FR-CH 109
FRE 211
GER 452
HU 174
ITA 153
NL 235
NOR 326
POL 36
PT 76
PT-BR 201
RO 27
RU 240
SLO 263
SPA 1680
SRP 69
SV 198
THA 81
TUR 275
ZH-CN 101
ZH-S 366
by_region <- data %>%
  group_by(user_id, region) %>%
  summarise() %>%
  ungroup() %>%
  group_by(region) %>%
  summarise(n = n()) %>%
  add_row(region = "TOTAL", n = n_distinct(data$user_id)) %>%
  knitr::kable("html") %>%
  kable_styling("striped")

save_kable(by_region, "figures/n_by_region.html")
           
by_region
region n
Africa 520
Asia 780
Australia & New Zealand 1044
Central America & Mexico 443
Eastern Europe 809
Middle East 414
Scandinavia 653
South America 1283
UK 361
USA & Canada 3312
Western Europe 1862
TOTAL 11481

1.6.1 Age and sex distribution per region

data %>%
  group_by(user_id, sex, age, region) %>%
  summarise() %>%
  ungroup() %>%
  group_by(region) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  ggplot(aes(as.numeric(age), fill = sex)) +
  geom_histogram(binwidth = 5, color = "grey") +
  geom_text(aes(x=0, y=5, label = paste0("n=",n)), color = "black") +
  labs(title="", y="", x="Participant age in 5-year bins") +
  facet_grid(region~., scales="free_y") +
  PSA_theme

1.6.2 Participants per trait per region

data %>%
  group_by(trait, region) %>%
  summarise(n = n_distinct(user_id)) %>%
  ggplot(aes(trait, n)) +
  geom_col(aes(fill = trait), show.legend = F) +
  geom_hline(yintercept = 15) +
  facet_grid(region~., scale = "free") +
  labs(title="", x="", y="Participants per trait per region") +
  theme( axis.text.x = element_text(angle = -45, hjust = 0) ) + 
  PSA_theme

ggsave("figures/participants_per_trait_per_region.png", width = 15, height = 8)

1.6.3 Participants per lab

labs <- data %>%
  unite(lab, country, lab) %>%
  group_by(region, lab, user_id) %>%
  summarise() %>%
  ungroup() %>%
  count(region, lab) %>%
  arrange(region, lab)

knitr::kable(labs) %>%
  kable_styling("striped")
region lab n
Africa KEN_001 166
Africa KEN_002 181
Africa NGA_001 43
Africa RSA_001 47
Africa RSA_002 83
Asia CHN_001 45
Asia CHN_006 121
Asia CHN_007 56
Asia CHN_014 94
Asia MAS_001 70
Asia MAS_002 53
Asia MAS_003 44
Asia MAS_004 114
Asia TAI_001 31
Asia TAI_002 71
Asia THA_001 81
Australia & New Zealand AUS_004 80
Australia & New Zealand AUS_005 88
Australia & New Zealand AUS_006 193
Australia & New Zealand AUS_007 251
Australia & New Zealand AUS_008 103
Australia & New Zealand AUS_011 84
Australia & New Zealand AUS_014 86
Australia & New Zealand NZL_001 20
Australia & New Zealand NZL_002 139
Central America & Mexico ECU_001 105
Central America & Mexico MEX_002 182
Central America & Mexico MEX_003 67
Central America & Mexico SLV_001 89
Eastern Europe HUN_001 174
Eastern Europe POL_001 36
Eastern Europe ROU_001 27
Eastern Europe RUS_005 240
Eastern Europe SRB_002 69
Eastern Europe SVK_001 79
Eastern Europe SVK_002 184
Middle East IRI_001 48
Middle East TUR_001 74
Middle East TUR_003 102
Middle East TUR_007 11
Middle East TUR_009 99
Middle East UAE_001 80
Scandinavia DNK_001 92
Scandinavia FIN_001 46
Scandinavia NOR_001 139
Scandinavia NOR_002 111
Scandinavia NOR_003 63
Scandinavia NOR_004 50
Scandinavia SWE_004 59
Scandinavia SWE_005 40
Scandinavia SWE_006 53
South America ARG_001 91
South America BRA_001 113
South America BRA_003 88
South America CHI_001 163
South America CHI_003 89
South America CHI_004 88
South America CHI_005 90
South America COL_003 47
South America COL_004 399
South America PER_001 63
South America PER_002 52
UK UK_001 34
UK UK_005 102
UK UK_006 44
UK UK_011 65
UK UK_018 38
UK UK_022 34
UK UK_024 44
USA & Canada CAN_001 45
USA & Canada CAN_008 67
USA & Canada CAN_015 83
USA & Canada CAN_017 127
USA & Canada CAN_018 308
USA & Canada PSA_001 80
USA & Canada PSA_002 329
USA & Canada USA_001 90
USA & Canada USA_003 81
USA & Canada USA_005 39
USA & Canada USA_011 35
USA & Canada USA_014 65
USA & Canada USA_020 194
USA & Canada USA_025 91
USA & Canada USA_026 81
USA & Canada USA_030 98
USA & Canada USA_031 84
USA & Canada USA_033 55
USA & Canada USA_036 43
USA & Canada USA_038 77
USA & Canada USA_039 123
USA & Canada USA_042 31
USA & Canada USA_050 158
USA & Canada USA_051 142
USA & Canada USA_054 90
USA & Canada USA_055 113
USA & Canada USA_065 51
USA & Canada USA_067 21
USA & Canada USA_075 122
USA & Canada USA_083 17
USA & Canada USA_113 120
USA & Canada USA_114 104
USA & Canada USA_115 52
USA & Canada USA_116 48
USA & Canada USA_117 48
Western Europe AUT_001 100
Western Europe AUT_002 86
Western Europe AUT_005 50
Western Europe BEL_001 86
Western Europe ESP_001 108
Western Europe ESP_005 117
Western Europe ESP_006 45
Western Europe FRA_003 75
Western Europe FRA_004 44
Western Europe FRA_005 32
Western Europe FRA_006 60
Western Europe GER_011 50
Western Europe GER_012 61
Western Europe GER_015 97
Western Europe GER_017 58
Western Europe GRE_002 99
Western Europe ITA_001 89
Western Europe ITA_003 64
Western Europe NED_008 121
Western Europe NED_009 235
Western Europe POR_001 76
Western Europe SUI_003 109

2 Analyses

2.1 Main Analysis

First, we will calculate the average rating for each face separately for each of the 13 traits. Like Oosterhof and Todorov (2008), we will then subject these mean ratings to principal component analysis with orthogonal components and no rotation. Using the criteria reported in Oosterhof and Todorov’s (2008) paper, we will retain and interpret the components with an Eigenvalue > 1.

2.1.1 Calculate Alphas

# takes a long time, so saves the results and loads from a file in the next chunk if set to eval = FALSE
data_alpha <- data %>%
  select(user_id, region, stim_id, rating, trait) %>%
  spread(stim_id, rating, sep = "_") %>%
  group_by(trait, region) %>%
  nest() %>%
  mutate(alpha = map(data, function(d) {
    if (dim(d)[1] > 2) {
      # calculate cronbach's alpha
      subdata <- d %>%
        as_tibble() %>%
        select(-user_id) %>%
        t()

      capture.output(suppressWarnings(a <- psych::alpha(subdata)))
      a$total["std.alpha"] %>% pluck(1) %>% round(3)
    } else {
      NA
    }
  })) %>%
  select(-data) %>%
  unnest(alpha) %>%
  ungroup()

saveRDS(data_alpha, file = "data/alphas.RDS")
data_alpha <- readRDS("data/alphas.RDS")

n_alpha <- data %>%
  select(user_id, region, trait) %>%
  distinct() %>%
  count(region, trait) %>%
  left_join(data_alpha, by = c("region", "trait")) %>%
  mutate(
    trait = as.factor(trait),
    region = str_replace(region, " (and|&) ", " &\n"),
    region = as.factor(region),
    region = factor(region, levels = rev(levels(region)))
  )

n_alpha %>%
  mutate(stat = paste("α =", alpha, "<br>n =", n)) %>%
  select(Region = region, stat, trait) %>%
  spread(trait, stat) %>%
  knitr::kable("html", escape = FALSE) %>%
  column_spec(2:14, width = "7%") %>%
  kable_styling("striped", font_size = 9) %>%
  save_kable("figures/alpha.html")
ggplot(n_alpha) +
  geom_tile(aes(trait, region, fill=alpha >=.7), 
           color = "grey20", show.legend = F) +
  geom_text(aes(trait, region, label=sprintf("α = %0.2f\nn = %.0f", alpha, n)), color = "black", size = 5) +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") +
  labs(x="", y="", title="") +
  scale_fill_manual(values = c("white", "red")) +
  PSA_theme

ggsave("figures/alphas.png", width = 18, height = 10)

2.1.2 Calculate Aggregate Scores

data_agg <- data %>%
  group_by(region, trait, stim_id) %>%
  summarise(rating = mean(rating)) %>%
  ungroup() %>%
  spread(trait, rating)

write_csv(data_agg, "data/psa001_agg.csv")
data_agg %>%
  gather("trait", "rating", aggressive:weird) %>%
  ggplot(aes(rating, fill = trait)) +
  geom_density(show.legend = F) +
  facet_grid(region~trait) +
  PSA_theme

ggsave("figures/agg_scores.png", width = 15, height = 8)

2.1.3 Principal Component Analysis (PCA)

The number of components to extract was determined using eigenvalues > 1 for each world region. PCA was conducted using the psych::principal() function with rotate="none".

# function to calculate PCA

psa_pca <- function(d) {
  traits <- select(d, -stim_id) %>% 
    select_if(colSums(!is.na(.)) > 0) # omits missing traits
  
  # principal components analysis (SPSS-style, following Oosterhof & Todorov)
  ev <- eigen(cor(traits))$values
  nfactors <- sum(ev > 1)
  
  pca <- principal(
    traits, 
    nfactors=nfactors, 
    rotate="none"
  )
  
  stats <- pca$Vaccounted %>% 
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "stat")
  
  unclass(pca$loadings) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "trait") %>%
    bind_rows(stats) %>%
    gather("pc", "loading", 2:(ncol(.)-1))
}
pca_analyses <- data_agg %>%
  bind_rows(ot_data) %>%
  group_by(region) %>%
  nest() %>%
  mutate(pca = map(data, psa_pca)) %>%
  select(-data) %>%
  unnest(pca) %>%
  ungroup() %>%
  mutate(pc = str_replace(pc, "PC", "Component "))

2.1.3.1 Number of Components (and proportion variance) by region

pca_analyses %>%
  filter(rowname == "Proportion Var") %>%
  group_by(region) %>%
  mutate(nPCs = n()) %>%
  ungroup() %>%
  spread(pc, loading) %>%
  select(-rowname, -type) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region nPCs Component 1 Component 2 Component 3
(Oosterhof & Todorov, 2008) 2 0.633 0.183
Africa 2 0.566 0.193
Asia 3 0.644 0.179 0.090
Australia & New Zealand 3 0.627 0.169 0.099
Central America & Mexico 3 0.563 0.183 0.085
Eastern Europe 3 0.646 0.165 0.088
Middle East 3 0.524 0.242 0.089
Scandinavia 3 0.636 0.181 0.081
South America 2 0.591 0.212
UK 3 0.598 0.180 0.098
USA & Canada 3 0.658 0.172 0.085
Western Europe 3 0.650 0.192 0.077

2.1.3.2 Trait Loadings by Region and Component

# order traits by P1 loading if loads positively on P1, or by -P2 loading otherwise
trait_order <- pca_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  spread(pc, loading) %>% 
  arrange(ifelse(`Component 1`>0,`Component 1`,-`Component 2`)) %>% 
  pull(rowname)

pca_prop_var <- pca_analyses %>%
  filter(rowname == "Proportion Var") %>%
  select(-rowname, -type) %>%
  mutate(loading = round(loading, 2))

pca_analyses %>%
  filter(type == "trait") %>%
  select(-type) %>%
  mutate(
    trait = as.factor(rowname),
    trait = factor(trait, levels = c(trait_order, "Prop.Var")),
    loading = round(loading, 2)
  ) %>%
  ggplot() +
  geom_tile(aes(pc, trait, fill=loading), show.legend = F) +
  geom_text(aes(pc, trait, label=sprintf("%0.2f", loading)), color = "black") +
  geom_text(data = pca_prop_var, aes(pc, y = 14, label=sprintf("%0.2f", loading)), color = "black") +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") + 
  scale_fill_gradient2(low = "dodgerblue", mid = "grey90", high = "#FF3333", limits=c(-1.1, 1.1)) +
  facet_wrap(~region, scales = "fixed", ncol = 4) +
  labs(x = "", y = "", title="") +
  PSA_theme

ggsave("figures/PCA_loadings.png", width = 15, height = 10)

2.1.3.3 Replication Criteria (PCA)

Oosterhof and Todorov’s valence-dominance model will be judged to have been replicated in a given world region if the first two components both have Eigenvalues > 1, the first component (i.e., the one explaining more of the variance in ratings) is correlated strongly (loading > .7) with trustworthiness and weakly (loading < .5) with dominance, and the second component (i.e., the one explaining less of the variance in ratings) is correlated strongly (loading > .7) with dominance and weakly (loading < .5) with trustworthiness. All three criteria need to be met to conclude that the model was replicated in a given world region.

pca_rep <- pca_analyses %>%
  filter(
    type == "trait", 
    rowname %in% c("trustworthy", "dominant"),
    pc %in% c("Component 1", "Component 2")
  ) %>%
  select(-type) %>%
  mutate(rowname = paste(pc, rowname)) %>%
  select(-pc) %>%
  spread(rowname, loading) %>%
  rename(Region = region) %>%
  mutate(Replicated = ifelse(
    `Component 1 dominant` < .5 & `Component 1 trustworthy` > .7 & 
    `Component 2 dominant` > .7 & `Component 2 trustworthy` < .5,
    "Yes", "No"
  )) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html", col.names = c("Region", "Dominant", "Trustworthy", "Dominant", "Trustworthy", "Replicated")) %>%
  add_header_above(c(" " = 1, "Component 1" = 2, "Component 2" = 2, "  " = 1)) %>%
  kable_styling("striped")

save_kable(pca_rep, "figures/PCA_rep_criteria.html")

pca_rep
Component 1
Component 2
Region Dominant Trustworthy Dominant Trustworthy Replicated
(Oosterhof & Todorov, 2008) -0.244 0.941 0.929 -0.060 Yes
Africa 0.271 0.924 0.843 -0.065 Yes
Asia 0.370 0.922 0.863 -0.006 Yes
Australia & New Zealand 0.257 0.943 0.907 -0.076 Yes
Central America & Mexico -0.030 0.913 0.923 -0.066 Yes
Eastern Europe 0.599 0.938 0.755 -0.113 No
Middle East 0.528 0.816 0.778 -0.394 No
Scandinavia 0.392 0.953 0.881 -0.121 Yes
South America 0.343 0.899 0.894 -0.155 Yes
UK 0.331 0.944 0.851 -0.121 Yes
USA & Canada 0.406 0.966 0.841 -0.073 Yes
Western Europe 0.357 0.957 0.875 -0.166 Yes

2.1.4 Factor Congruence (PCA)

This analysis determines the congruence between the components from Oosterhof & Todorov (2008) and the components in each world region, using the psych::factor.congruence function. Congruence is labeled “not similar” for values < 0.85, “fairly similar”, for values < 0.09, and “equal” for values >= 0.95.

# get loadings for original O&T2008
ot2008_pca_loadings <- pca_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  select(-region, -type) %>%
  spread(pc, loading) %>%
  column_to_rownames()

# run factor congruence for each region  
fc_pca <- pca_analyses %>%
  filter(type == "trait", region != "(Oosterhof & Todorov, 2008)") %>%
  select(-type) %>%
  spread(pc, loading) %>%
  group_by(region) %>%
  nest() %>%
  mutate(fc = map(data, function(d) {
    loadings <- d %>%
      as.data.frame() %>%
      select(rowname, `Component 1`, `Component 2`) %>%
      arrange(rowname) %>%
      column_to_rownames()
    
    psych::factor.congruence(loadings, 
                             ot2008_pca_loadings, 
                             digits = 4) %>%
      as.data.frame() %>%
      rownames_to_column(var = "regionPC")
  })) %>%
  select(-data) %>%
  unnest(fc) %>%
  ungroup()

pc_fc_table <- fc_pca %>%
  gather(origPC, congruence, `Component 1`:`Component 2`) %>%
  mutate(sig = case_when(
           congruence < .85 ~ "not similar",
           congruence < .95 ~ "fairly similar",
           congruence >= .95 ~ "equal"
         ),
         congruence = sprintf("%0.3f", congruence)) %>%
  filter(regionPC == origPC) %>%
  select(region, PC = regionPC, congruence, sig) %>%
  gather(k, v, congruence, sig) %>%
  unite(PC, PC, k, remove = T) %>%
  spread(PC, v) %>%
  knitr::kable("html", digits = 3, align = 'lrlrl', escape = F,
               col.names = c("Region", "Loading", "Congruence", "Loading", "Congruence")) %>%
  add_header_above(c(" " = 1, "Component 1" = 2, "Component 2" = 2)) %>%
  kable_styling("striped")

save_kable(pc_fc_table, "figures/PCA_factor_congruence.html")

pc_fc_table
Component 1
Component 2
Region Loading Congruence Loading Congruence
Africa 0.980 equal 0.947 fairly similar
Asia 0.974 equal 0.843 not similar
Australia & New Zealand 0.982 equal 0.959 equal
Central America & Mexico 0.993 equal 0.953 equal
Eastern Europe 0.953 equal 0.948 fairly similar
Middle East 0.944 fairly similar 0.853 fairly similar
Scandinavia 0.973 equal 0.960 equal
South America 0.973 equal 0.948 fairly similar
UK 0.976 equal 0.938 fairly similar
USA & Canada 0.972 equal 0.952 equal
Western Europe 0.975 equal 0.936 fairly similar

2.2 Robustness Checks

2.2.1 Exploratory Factor Analysis (EFA)

The number of factors to extract was determined using parallel analysis (paran::paran()) for each world region. EFA was conducted using the psych::fa() function with all default options.

# function to calculate EFA

psa_efa <- function(d) {
  traits <- select(d, -stim_id) %>% 
    select_if(colSums(!is.na(.)) > 0) # omits missing traits
  
  # Parallel Analysis with Dino's 'paran' package. 
  nfactors <- paran(traits, iterations = 5000, 
          centile = 0, quietly = TRUE, 
          status = FALSE, all = TRUE, 
          cfa = TRUE, graph = FALSE)
  
  efa <- psych::fa(traits, nfactors$Retained) 
  
  stats <- efa$Vaccounted %>% 
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "stat")
  
  unclass(efa$loadings) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "trait") %>%
    bind_rows(stats) %>%
    gather("mr", "loading", 2:(ncol(.)-1))
}

Calculate for each region

efa_analyses <- data_agg %>%
  bind_rows(ot_data) %>%
  group_by(region) %>%
  nest() %>%
  mutate(efa = map(data, psa_efa)) %>%
  select(-data) %>%
  unnest(efa) %>%
  ungroup() %>%
  mutate(mr = str_replace(mr, "MR", "Factor "))

2.2.1.1 Number of Factors (and proportion variance) by region

Note: the analysis for USA & Canada produced a warning: “The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.”

efa_analyses %>%
  filter(rowname == "Proportion Var") %>%
  group_by(region) %>%
  mutate(nMRs = n()) %>%
  ungroup() %>%
  spread(mr, loading) %>%
  select(-rowname, -type) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region nMRs Factor 1 Factor 2 Factor 3 Factor 4
(Oosterhof & Todorov, 2008) 2 0.564 0.227
Africa 3 0.371 0.158 0.246
Asia 3 0.358 0.372 0.157
Australia & New Zealand 4 0.302 0.154 0.216 0.225
Central America & Mexico 4 0.194 0.220 0.235 0.167
Eastern Europe 3 0.491 0.191 0.191
Middle East 3 0.271 0.263 0.275
Scandinavia 3 0.405 0.196 0.268
South America 4 0.204 0.249 0.242 0.192
UK 4 0.246 0.185 0.264 0.177
USA & Canada 4 0.271 0.204 0.246 0.197
Western Europe 4 0.269 0.171 0.245 0.244

2.2.1.2 Trait Loadings by Region and Factor

efa_prop_var <- efa_analyses %>%
  filter(rowname == "Proportion Var") %>%
  select(-rowname, -type) %>%
  mutate(loading = round(loading, 2))

efa_analyses %>%
  filter(type == "trait") %>%
  select(-type) %>%
  mutate(
    trait = as.factor(rowname),
    trait = factor(trait, levels = c(trait_order, "Prop.Var")),
    loading = round(loading, 2)
  ) %>%
  ggplot() +
  geom_tile(aes(mr, trait, fill=loading), show.legend = F) +
  geom_text(aes(mr, trait, label=sprintf("%0.2f", loading)), color = "black") +
  geom_text(data = efa_prop_var, aes(mr, y = 14, label=sprintf("%0.2f", loading)), color = "black") +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") + 
  scale_fill_gradient2(low = "dodgerblue", mid = "grey90", high = "#FF3333", limits=c(-1.1, 1.1)) +
  facet_wrap(~region, scales = "fixed", ncol = 4) +
  labs(x = "", y = "", title="") +
  PSA_theme

ggsave("figures/EFA_loadings.png", width = 15, height = 10)

2.2.1.3 Replication Criteria (EFA)

Oosterhof and Todorov’s valence-dominance model will be judged to have been replicated in a given world region if the the first factor is correlated strongly (loading > .7) with trustworthiness and weakly (loading < .5) with dominance, and the second factor is correlated strongly (loading > .7) with dominance and weakly (loading < .5) with trustworthiness. All these criteria need to be met to conclude that the model was replicated in a given world region.

efa_rep <- efa_analyses %>%
  filter(
    type == "trait", 
    rowname %in% c("trustworthy", "dominant"),
    mr %in% c("Factor 1", "Factor 2")
  ) %>%
  select(-type) %>%
  mutate(rowname = paste(mr, rowname)) %>%
  select(-mr) %>%
  spread(rowname, loading) %>%
  rename(Region = region) %>%
  mutate(Replicated = ifelse(
    `Factor 1 dominant` < .5 & `Factor 1 trustworthy` > .7 & 
    `Factor 2 dominant` > .7 & `Factor 2 trustworthy` < .5,
    "Yes", "No"
  )) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html", col.names = c("Region", "Dominant", "Trustworthy", "Dominant", "Trustworthy", "Replicated")) %>%
  add_header_above(c(" " = 1, "Factor 1" = 2, "Factor 2" = 2, "  " = 1)) %>%
  kable_styling("striped")

save_kable(efa_rep, "figures/EFA_rep_criteria.html")

efa_rep
Factor 1
Factor 2
Region Dominant Trustworthy Dominant Trustworthy Replicated
(Oosterhof & Todorov, 2008) 0.228 0.826 0.970 -0.288 Yes
Africa 0.200 0.786 0.796 -0.133 Yes
Asia 0.110 0.236 0.487 0.761 No
Australia & New Zealand 0.157 0.730 0.873 -0.078 Yes
Central America & Mexico 0.142 0.662 0.831 -0.311 No
Eastern Europe 0.750 0.843 0.609 -0.322 No
Middle East 0.427 0.112 0.566 -0.699 No
Scandinavia 0.428 0.744 0.806 -0.304 Yes
South America 0.278 0.255 0.757 -0.472 No
UK 0.265 0.510 0.766 -0.299 No
USA & Canada 0.320 0.426 0.711 -0.335 No
Western Europe 0.111 0.398 0.869 -0.172 No

2.2.2 Factor Congruence (EFA)

This analysis determines the congruence between the factors from Oosterhof & Todorov (2008) and the factors in each world region, using the psych::factor.congruence function. Congruence is labeled “not similar” for values < 0.85, “fairly similar”, for values < 0.09, and “equal” for values >= 0.95.

# get loadings for original O&T2008
ot2008_efa_loadings <- efa_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  select(-region, -type) %>%
  spread(mr, loading) %>%
  column_to_rownames()

# run factor congruence for each region 
fc_efa <- efa_analyses %>%
  filter(type == "trait", region != "(Oosterhof & Todorov, 2008)") %>%
  select(-type) %>%
  spread(mr, loading) %>%
  group_by(region) %>%
  nest() %>%
  mutate(fc = map(data, function(d) {
    loadings <- d %>%
      as.data.frame() %>%
      select(rowname, `Factor 1`, `Factor 2`) %>%
      arrange(rowname) %>%
      column_to_rownames()
    
    psych::factor.congruence(loadings, 
                             ot2008_efa_loadings, 
                             digits = 4) %>%
  as.data.frame() %>%
  rownames_to_column(var = "regionMR")
  })) %>%
  select(-data) %>%
  unnest(fc) %>%
  ungroup()

mr_fc_table <- fc_efa %>%
  gather(origMR, congruence, `Factor 1`:`Factor 2`) %>%
  mutate(sig = case_when(
           congruence < .85 ~ "not similar",
           congruence < .95 ~ "fairly similar",
           congruence >= .95 ~ "equal"
         ),
         congruence = sprintf("%0.3f", congruence)) %>%
  filter(regionMR == origMR) %>%
  select(region, MR = regionMR, congruence, sig) %>%
  gather(k, v, congruence, sig) %>%
  unite(MR, MR, k, remove = T) %>%
  spread(MR, v) %>%
  knitr::kable("html", digits = 3, align = 'lrlrl', 
               col.names = c("Region", "Loading", "Congruence", "Loading", "Congruence")) %>%
  add_header_above(c(" " = 1, "Factor 1" = 2, "Factor 2" = 2)) %>%
  kable_styling("striped")


save_kable(mr_fc_table, "figures/EFA_factor_congruence.html")

mr_fc_table
Factor 1
Factor 2
Region Loading Congruence Loading Congruence
Africa 0.894 fairly similar 0.900 fairly similar
Asia 0.765 not similar -0.090 not similar
Australia & New Zealand 0.810 not similar 0.933 fairly similar
Central America & Mexico 0.777 not similar 0.972 equal
Eastern Europe 0.891 fairly similar 0.957 equal
Middle East 0.736 not similar 0.855 fairly similar
Scandinavia 0.884 fairly similar 0.980 equal
South America 0.682 not similar 0.956 equal
UK 0.774 not similar 0.977 equal
USA & Canada 0.772 not similar 0.975 equal
Western Europe 0.774 not similar 0.949 fairly similar